# !python3      # 使用工具:PyCharm
# 用户登录名:yyds_l
# 项目名称: python爬虫     文件名称:淘宝女装数据
# 创建日期:2022/3/30      创建时间:9:49
# 2022/03/30 09:49

"""ok"""


# 页码网址筛选
def html_url():
    a1 = int(input("请输入开始页码："))
    a2 = int(input('请输入结束页码：'))
    os = 1
    data_open_url = []
    for i in range(1, a2 + 2 - a1):
        if (i == 1):
            url = "https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&commend=all" \
                  "&ssid=s5-e&search_type=item&sourceId=tb.index" \
                  "&spm=a21bo.jianhua.201856-taobao-item.1" \
                  "&ie=utf8&initiative_id=tbindexz_20170306"
            # print(url)
        else:
            bcoffset = os
            ntoffset = os
            os -= 3
            p4ppushleft = 2
            s = 44*(i-1)
            url = "https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&" \
                  "commend=all&ssid=s5-e&search_type=item&sourceId=tb.index" \
                  "&spm=a21bo.jianhua.201856-taobao-item.1" \
                  "&ie=utf8&initiative_id=tbindexz_20170306" \
                  "&bcoffset=" + str(bcoffset) + "&ntoffset=" + str(ntoffset) + \
                  "&p4ppushleft=" + str(p4ppushleft) + "%2C48&s=" + str(s) + ""
            # print(url)
        data_open_url.append(url)
    return data_open_url
    # print(data_open_url)
    # print(type(data_open_url))
    # print(len(data_open_url))


# 访问网址，获取数据
def open_html(data_open_url, headers):
    try:
        o_data = []
        # 循环遍历url 访问网站
        for i in range(0, len(data_open_url)):
            # print(data_open_url[i])
            data = requests.get(url=data_open_url[i], headers=headers)
            data = data.text
            o_data.append(data)
        # print(len(o_data))
        return o_data
    except Exception as err:
        print(err)
        pass


# 数据筛选
def html_data(o_data):
    # print(len(o_data))
    # 遍历数据
    for k in range(0, len(o_data)):
        re_s = r'("raw_title":".*?")'
        da = re.findall(re_s, o_data[k])
        # print()
        for i in range(0, len(da)):
            print(da[i])
    # print(len(da))


# 保存数据


# 运行
if __name__ == '__main__':
    import requests
    import re

    # url = "https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306"
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
        "cookie": "t=91c7c8d5f0da658839d462b6d9813cdc; cna=VokSGv+ovm0CAbfWrUM95zoO; _samesite_flag_=true; cookie2=15939c970500d0b030568aa782e5e8b4; _tb_token_=e09ee566b6e33; xlly_s=1; sgcookie=E100hmOqIYWlN4wDNUtNREsXZlsxbCP8iFENmRs2SlgRWbPxqokqtylkUuArF37TROknirJZjdGq6J6Asl3y77VYvaXvNSNtMWe54lI1m6ogvp25sKQvLfN8vrtX6g7y87Ti; unb=3478271834; uc3=id2=UNQwXXYWjf9NHQ%3D%3D&vt3=F8dCvCtNiwtT7hB5zCQ%3D&lg2=UIHiLt3xD8xYTw%3D%3D&nk2=3RPXZv78Czu6iv5n; csg=f47650de; lgc=%5Cu5C31%5Cu597D%5Cu563F%5Cu563F5364; cancelledSubSites=empty; cookie17=UNQwXXYWjf9NHQ%3D%3D; dnk=%5Cu5C31%5Cu597D%5Cu563F%5Cu563F5364; skt=07c89ed0b10449d1; existShop=MTY0ODYwMzY0OQ%3D%3D; uc4=nk4=0%4035g9ircNtoj3cSkg4yHOxrI5g9uCvsU%3D&id4=0%40UgP7imTcwWJchHOST%2FQhrqCHy003; tracknick=%5Cu5C31%5Cu597D%5Cu563F%5Cu563F5364; _cc_=WqG3DMC9EA%3D%3D; _l_g_=Ug%3D%3D; sg=441; _nk_=%5Cu5C31%5Cu597D%5Cu563F%5Cu563F5364; cookie1=BxNYLhmS7fIAvSTGCWqRRIijaDU%2FpoO98cUMC%2F7OU24%3D; _m_h5_tk=de0c2437debb270e04c0ab9ce34d3d1d_1648611210841; _m_h5_tk_enc=8d9bbfe482c5d0b20765d6fb72782f41; mt=ci=116_1; thw=cn; uc1=cookie14=UoewCLe8GWbD2A%3D%3D&cookie15=UtASsssmOIJ0bQ%3D%3D&cookie16=UIHiLt3xCS3yM2h4eKHS9lpEOw%3D%3D&existShop=true&pas=0&cookie21=URm48syIZJfmZ9wVCtpzEQ%3D%3D; enc=FqFkjZf7Ia3JrAYRicoCCh6gDL2h2i1C94lQJ%2F4cvjmbe8yjcpMdyNH4YMiVWI48TIoJBtRwIDUdJOvLqRhRMw%3D%3D; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; JSESSIONID=88B7A1A9D30B56CB9CE9ACCA61886372; tfstk=cL5RBJ_VOoquBy8A7TecAVbo3qfdZVG2x4tZ9r2OOfDxDFhdiTfG6Ed7FFKw2rC..; l=eBQE89keLYefpqYBBOfaFurza7keIIRvmuPzaNbMiOCP9wC65z7cW6VDrV8BCnFVh6leR3yxiRLwBeYBqHtInxvte5DDwQHmn; isg=BIeH7iBYqV0RJy3R-F6VE-RwFjtRjFtumDPoO1l0oZY9yKeKYV4pvseAb8gzFDPm"
    }
    data_url = html_url()
    o_data = open_html(data_url, headers)
    html_data(o_data)

# https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306
# https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s=44
# https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=-2&ntoffset=-2&p4ppushleft=2%2C48&s=88
# https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=-5&ntoffset=-5&p4ppushleft=2%2C48&s=132
